
abba <- haven::read_dta(paste0(here::here(), "/replication/data/abba_seeding.dta"))

abba$seeded_ind <- ifelse(abba$uidcount > 0 & !is.na(abba$uidcount), 1, 0)
abba$seeded_ind <- ifelse(abba$uidcount == 0, 0, abba$seeded_ind)

p1 <- abba %>% 
  tidyr::pivot_longer(cols = c("seeded_ind", "seed_ind1")) %>% 
  dplyr::select(name, value, g7_pred_ann_total_income_y0) %>% 
  dplyr::filter(!is.na(value) & !is.na(g7_pred_ann_total_income_y0)) %>%
  ggplot(., mapping = aes(x = g7_pred_ann_total_income_y0, y = ..density.. * 10000, fill = factor(value), color = factor(value))) + 
  geom_histogram(position = "identity", alpha = 0.5) +
  scale_y_continuous(limits = c(0, 0.4)) + 
  scale_color_manual(values = c("1" = NA, "0" = "black")) +
  scale_fill_manual(values = c("1" = "steelblue", "0" = "white")) +
  labs(x = "Predicted annual income (Rupees)", 
       y = "Proportion of households", 
       fill = "", color = "",
       title = "Panel A: Expected income") +
  facet_wrap(. ~ factor(name, levels = c("seeded_ind", "seed_ind1")), labeller = as_labeller(c("seed_ind1" = "Seeded in October 2016", "seeded_ind" = "Seeded at baseline"))) +
  theme_bw() + 
  theme(
    strip.background = element_blank(),
    strip.text = element_text(hjust = 0), 
    aspect.ratio = 1/2, 
    legend.position = "none")
p1

p2 <- abba %>% 
  tidyr::pivot_longer(cols = c("seeded_ind", "seed_ind1")) %>% 
  dplyr::select(name, value, a7_education_max_avg) %>% 
  dplyr::filter(!is.na(value) & !is.na(a7_education_max_avg)) %>%
  ggplot(.) +   
  geom_histogram(mapping = aes(
    x = a7_education_max_avg, 
    y = ..density.. * 0.5, 
    fill  = factor(value), 
    color = factor(value)), 
    position = "identity",
    alpha = 0.5) +
  scale_y_continuous(limits = c(0, 0.2)) +
  scale_color_manual(values = c("1" = NA, "0" = "black"),         labels = c("1" = "Seeded", "0" = "Unseeded")) +
  scale_fill_manual(values = c("1" = "steelblue", "0" = "white"), labels = c("1" = "Seeded", "0" = "Unseeded")) +
  labs(x = "Average years of education", 
       y = "Proportion of households", 
       fill = "", color = "", 
       title = "Panel B: Years of schooling") +
  facet_wrap(. ~ factor(name, levels = c("seeded_ind", "seed_ind1")), labeller = as_labeller(c("seed_ind1" = "Seeded in October 2016", "seeded_ind" = "Seeded at baseline"))) +
  theme_bw() + 
  theme(
    strip.background = element_blank(),
    strip.text = element_text(hjust = 0), 
    aspect.ratio = 1/2, 
    legend.position = "none")
p2

legend <- get_legend(p2 + theme(legend.position = "bottom"))

# Standardize all plots so they look good together
g1 <- ggplotGrob(p1)
g2 <- ggplotGrob(p2)
maxWidth <- grid::unit.pmax(g1$widths[2:5], 
                            g2$widths[2:5]
)
g1$widths[2:5] <- as.list(maxWidth)
g2$widths[2:5] <- as.list(maxWidth)

full <- gridExtra::grid.arrange(g1, g2, legend, ncol = 1, heights = c(5, 5, 1), layout_matrix = rbind(c(1, 1), c(2, 2), 3))

ggsave(plot = full, filename = paste0(here::here(), "/replication/exhibits/FigureA_3_new.pdf"))

### Means table for Karthik
abba %>% 
  dplyr::left_join(., dplyr::select(el1, uid, f1_religion_hh, f4_roof_mtrl, f6_land_area_1)) %>% 
  dplyr::mutate(muslim = ifelse(f1_religion_hh == 2, 1, 0),
                muslim = ifelse(is.na(f1_religion_hh), NA, muslim),
                SC     = ifelse(f2_caste_hh == 1, 1, 0),
                SC     = ifelse(is.na(f2_caste_hh), NA, SC),
                ST     = ifelse(f2_caste_hh == 2, 1, 0),
                ST     = ifelse(is.na(f2_caste_hh), NA, ST),
                thatch = ifelse(f4_roof_mtrl == 3, 1, 0),
                thatch = ifelse(is.na(f4_roof_mtrl), NA, thatch)) %>%
  tidyr::pivot_longer(cols = c("seeded_ind", "seed_ind1")) %>% 
  dplyr::select(name, value, g7_pred_ann_total_income_y0, a7_education_max_avg, muslim, SC, ST, thatch, pweight) %>% 
  dplyr::filter(!is.na(value)) -> corr_dat

corr_dat %>%
  dplyr::group_by(name, value) %>% 
  dplyr::summarise(income = weighted.mean(g7_pred_ann_total_income_y0, na.rm = TRUE, w = pweight),
                   educ   = weighted.mean(a7_education_max_avg, na.rm = TRUE, w = pweight),
                   muslim = weighted.mean(muslim, na.rm = TRUE, w = pweight), 
                   SC     = weighted.mean(SC, na.rm = TRUE, w = pweight), 
                   ST     = weighted.mean(ST, na.rm = TRUE, w = pweight), 
                   thatch = weighted.mean(thatch, na.rm = TRUE, w = pweight)) %>% 
  dplyr::filter(!is.na(value))

pval_list <- list()
for(i in c("g7_pred_ann_total_income_y0", "a7_education_max_avg", "muslim", "SC", "ST", "thatch")){
  pval_list[[paste0(i, "_seed_ind1")]] <- weights::wtd.t.test(x       = corr_dat[[i]][corr_dat$name == "seed_ind1" & corr_dat$value == 0],
                                                              y       = corr_dat[[i]][corr_dat$name == "seed_ind1" & corr_dat$value == 1],
                                                              weight  = corr_dat$pweight[corr_dat$name == "seed_ind1" & corr_dat$value == 0],
                                                              weighty = corr_dat$pweight[corr_dat$name == "seed_ind1" & corr_dat$value == 1])
  pval_list[[paste0(i, "_seeded_ind")]] <- weights::wtd.t.test(x       = corr_dat[[i]][corr_dat$name == "seeded_ind" & corr_dat$value == 0],
                                                               y       = corr_dat[[i]][corr_dat$name == "seeded_ind" & corr_dat$value == 1],
                                                               weight  = corr_dat$pweight[corr_dat$name == "seeded_ind" & corr_dat$value == 0],
                                                               weighty = corr_dat$pweight[corr_dat$name == "seeded_ind" & corr_dat$value == 1])
}

for(i in c("g7_pred_ann_total_income_y0", "a7_education_max_avg", "muslim", "SC", "ST", "thatch")){
  print(weights::wtd.t.test(x       = corr_dat[[i]][corr_dat$name == "seed_ind1" & corr_dat$value == 0],
                            y       = corr_dat[[i]][corr_dat$name == "seeded_ind" & corr_dat$value == 0],
                            weight  = corr_dat$pweight[corr_dat$name == "seed_ind1" & corr_dat$value == 0],
                            weighty = corr_dat$pweight[corr_dat$name == "seeded_ind" & corr_dat$value == 0]))
  print(weights::wtd.t.test(x       = corr_dat[[i]][corr_dat$name == "seed_ind1" & corr_dat$value == 1],
                            y       = corr_dat[[i]][corr_dat$name == "seeded_ind" & corr_dat$value == 1],
                            weight  = corr_dat$pweight[corr_dat$name == "seed_ind1" & corr_dat$value == 1],
                            weighty = corr_dat$pweight[corr_dat$name == "seeded_ind" & corr_dat$value == 1]))
}

# Seed_ind1 --> october 2016
# Seeded_ind --> Baseline

x <- data.table::rbindlist(lapply(X = c("g7_pred_ann_total_income_y0", "a7_education_max_avg", "muslim", "SC", "ST", "thatch"), 
                                  FUN = function(x) as.data.frame(t(round(as.numeric(unlist(pval_list[[paste0(x, "_seed_ind1")]])[c(4:7)]), 2))))) %>% 
  dplyr::select(V3, V4, V2, V1)

y <- data.table::rbindlist(lapply(X = c("g7_pred_ann_total_income_y0", "a7_education_max_avg", "muslim", "SC", "ST", "thatch"), 
                                  FUN = function(x) as.data.frame(t(round(as.numeric(unlist(pval_list[[paste0(x, "_seeded_ind")]])[c(4:7)]), 2))))) %>% 
  dplyr::select(V3, V4, V2, V1)

xx <- rbind(y,x) %>% 
  as.data.frame() %>%
  dplyr::rename(
    "Unseeded" = V3, 
    "Seeded" = V4, 
    "Difference" = V2, 
    "p-value" = V1) %>%
  dplyr::mutate(
    Difference = dplyr::case_when(
      `p-value` <= 0.01 ~ paste0(Difference, "ASTRSKASTRSKASTRSK"),
      `p-value` <= 0.05 ~ paste0(Difference, "ASTRSKASTRSK"),
      `p-value` <= 0.1 ~ paste0(Difference, "ASTRSK"),
      TRUE ~ as.character(Difference)
    )
  ) %>%
  as.data.frame(.) 
xx$Outcome <- rep(c("Expected income",
                  "Years of schooling",
                  "Muslim",
                  "SC", 
                  "ST", 
                  "Thatched roof"), 2) 

xx <- dplyr::select(xx, Outcome, dplyr::everything())
xx_sub <- dplyr::filter(xx, Outcome %in% c("Years of schooling", "SC", "ST")) %>% 
  as.matrix(.)

xx_tab <- stargazer::stargazer(
  xx_sub,
  type = "latex", 
  align = FALSE, 
  digits = 2,
  column.sep.width = "5pt",
  df = FALSE, 
  digits.extra = 2,
  nobs = TRUE,
  float = FALSE,
  header = FALSE,
  model.numbers = TRUE,
  multicolumn = FALSE,
  dep.var.caption = "",
  title = "", 
  label = "",
  notes.append = FALSE, 
  notes.align = "l",
  notes = "")

xx_tab <- stringr::str_replace_all(string = xx_tab, pattern = "p-value", replacement = "$p$-value")

xx_tab <- 
  starpolishr::star_insert_row(
    xx_tab, 
    c("\\cmidrule(lr){1-1} \\cmidrule(lr){2-2}  \\cmidrule(lr){3-3}  \\cmidrule(lr){4-4}  \\cmidrule(lr){5-5}"),
    insert.after = c(5)
  )

xx_tab <- 
  starpolishr::star_insert_row(
    xx_tab, 
    c(" \\multicolumn{1}{c}{(1)} & \\multicolumn{1}{c}{(2)} & \\multicolumn{1}{c}{(3)} & \\multicolumn{1}{c}{(4)} & \\multicolumn{1}{c}{(5)}\\\\"),
    insert.after = c(6)
  )

# Add panel
xx_tab <- 
  starpolishr::star_insert_row(xx_tab, c("\\\\[-2.0ex] \\multicolumn{8}{@{} l}{\\textit{Panel A: Seeded at Baseline}}\\\\ \\\\[-1.5ex]"), insert.after = c(8)
  )
xx_tab <- 
  starpolishr::star_insert_row(xx_tab, c("\\hline \\\\[-2.0ex] \\multicolumn{8}{@{} l}{\\textit{Panel B: Seeded in October 2016}}\\\\ \\\\[-1.5ex]"), insert.after = c(12))

xx_tab <- stringr::str_replace_all(string = xx_tab, pattern = "Years of schooling", replacement = "\\\\multicolumn{1}{@{} l}{Years of schooling}")
xx_tab <- stringr::str_replace_all(string = xx_tab, pattern = "SC ", replacement = "\\\\multicolumn{1}{@{} l}{SC}")
xx_tab <- stringr::str_replace_all(string = xx_tab, pattern = "ST ", replacement = "\\\\multicolumn{1}{@{} l}{ST}")
 
xx_tab <- stringr::str_replace_all(string = xx_tab, pattern = "ASTRSK", replacement = "*")


# Save
starpolishr::star_tex_write(
  starlist = xx_tab,
  file = paste0(here::here(), "/replication/exhibits/ses_by_seed.tex"), 
  headers = FALSE
)

